Uploading Files into R and Adding appropriate libraries
library(ggplot2)
setwd("/Users/chanceyan/Documents/R/ThesisCrab")
crabdata <- read.csv("Feeding.csv", h=T)
master <- read.csv("Master.csv", h=T)
We need to clean up the data first to use it.
crabdata$FoodIn <- as.numeric(as.character(crabdata$FoodIn))
## Warning: NAs introduced by coercion
crabdata$FoodOut <- as.numeric(as.character(crabdata$FoodOut))
for(i in 1:nrow(crabdata)){
if(!is.numeric(crabdata[i,4])){
crabdata <- crabdata[-i,]
}
if(!is.numeric(crabdata[i,6])){
crabdata <- crabdata[-i,]
}
}
#Creating number IDs for each crab based of their IDs so it's easier to loop through with future code. Also I'm adding a total eaten by proportion column, so we have a fair metric to compare by.
for(i in 1:36){
master$num[[i]] <- substr(master$ID[[i]], 3,5)
}
crabdata$amount.eaten <- crabdata$FoodIn-crabdata$FoodOut
all <- merge(crabdata, master, by="ID")
all$proportion.eaten <- (all$amount.eaten / all$WW)
#Changing date column into a date data type, so that R can read it as a Date.
all$Date.x = as.Date(all$Date.x, format = "%m/%d/%Y")
#This code will graph each individual
#for(i in 1:36){
# craby <- subset(all, all$num == i)
# graph <- ggplot(aes(x = as.factor(Date.x), y = proportion.eaten), data = craby) + geom_point() + ggtitle(craby$ID)
# print(graph)
#}
Graping each individual clearly.
table(all$Species.x, all$Trial.x)
##
## 16 18 20 22 24 26
## CI 61 54 90 101 88 91
## CM 60 54 90 108 78 105
## HS 59 54 90 119 90 102
craby <- subset(all, all$Species.x == "CI")
ggplot(craby, aes(x = Date.x, y = proportion.eaten, color = ID, group = ID)) +
geom_point() +
geom_line() +
facet_wrap(~Trial.x)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`geom_line()`).
craby <- subset(all, all$Species.x == "CM")
ggplot(craby, aes(x = Date.x, y = proportion.eaten, color = ID, group = ID)) +
geom_point() +
geom_line() +
facet_wrap(~Trial.x)
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).
craby <- subset(all, all$Species.x == "HS")
ggplot(craby, aes(x = Date.x, y = proportion.eaten, color = ID, group = ID)) +
geom_point() +
geom_line() +
facet_wrap(~Trial.x)
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
#Graping everyone
holddf <- subset(all, Trial.x == "16")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("16")
holddf <- subset(all, Trial.x == "18")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("18")
holddf <- subset(all, Trial.x == "20")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("20")
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
holddf <- subset(all, Trial.x == "22")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("22")
holddf <- subset(all, Trial.x == "24")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("24")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
holddf <- subset(all, Trial.x == "26")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("26")
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggplot(all, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x)
## Warning: Removed 41 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
#Graping by species so it's easier to see
holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Green Crab")
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemi")
## Warning: Removed 14 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
The data does not look clean. So I will clean it first by removing ones that ate everything. I was also push those who did not eat to 0
for(i in 1:nrow(all)){
if(grepl("DE", all$Notes.x[i], fixed = TRUE)){
all$proportion.eaten[i] = 0
}
if(grepl("AE", all$Notes.x[i], fixed = TRUE)){
all <- all[-i, ]
}
if(grepl("DI", all$Notes.x[i], fixed = TRUE)){
all <- all[-i, ]
}
if(is.na(all$amount.eaten[i])){
all <- all[-i,]
}
else{
if(all$amount.eaten[i] < 0){
all$proportion.eaten[i] = 0
}
}
}
#Re running graphs
holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer")
holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Green Crab")
holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemi")
Looking better! But maybe we can still adjust outliers. Let’s try removing those that molted.
array <- c()
for(i in 1:nrow(all)){
if(grepl("ME", all$Notes.x[i], fixed = TRUE)){
ID_value <- all$ID[i]
for(j in 1:nrow(all)){
if(all$ID[j] == ID_value){
array <- c(array, j)
}
}
}
}
all <- all[-array,]
array <- c()
#Re running graphs
holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer")
holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Green Crab")
holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemi")
Additional things to look at - size to porportion eaten
ggplot(all, aes(x = WW, y = CL, color = Species.x)) + geom_point()
ggplot(all, aes(x = CL, y = proportion.eaten, color = Species.x)) + geom_point() + facet_wrap(~Species.x,scales = "free_x") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = WW, y = proportion.eaten, color=as.factor(Period))) + geom_point() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = WW, y = proportion.eaten, color=as.factor(Period))) + geom_point() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Carcinus") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = WW, y = proportion.eaten, color=as.factor(Period))) + geom_point() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemigrapsus") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(all, aes(x = WW, y = proportion.eaten, color = Species.x)) + geom_point() + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(all, aes(x = WW, y = proportion.eaten, color = Species.x)) + geom_point() + geom_smooth(method = "lm", color ="black") + facet_wrap(~Species.x ,scales = "free_x")
## `geom_smooth()` using formula = 'y ~ x'
Get last 3 values to average out and compare among species and
trial.
avg_list <- NA
species_list <- NA
trial_list <- NA
period_list <- NA
sample_list <- NA
preload_spp <- c("CI", "CM", "HS")
preload_tr <- c("26", "24", "22", "20", "18", "16")
for(a in 1:length(preload_spp)){
for(b in 1:length(preload_tr) ){
for(i in 1:2){
holddf <- subset(all, all$Species.x == preload_spp[a])
holddf <- subset(holddf, holddf$Trial.x == preload_tr[b])
holddf <- subset(holddf, holddf$Period == i)
holddf$Date.x <- as.Date(holddf$Date.x)
last.dates <- sort(unique(holddf$Date.x))
last.dates <- tail(last.dates, 3)
holddf <- subset(holddf, holddf$Date.x == last.dates[1] | holddf$Date.x == last.dates[2] | holddf$Date.x == last.dates[3])
avg_list <- append(avg_list, mean(holddf$proportion.eaten))
species_list <- append(species_list, holddf$Species.x[1])
trial_list <- append(trial_list, holddf$Trial.x[1])
period_list <- append(period_list, holddf$Period[1])
sample_list <- append(sample_list, nrow(na.omit(holddf)))
}
}
}
average_df <- data.frame(avg_list, species_list, trial_list, period_list, sample_list)
average_df <- na.omit(average_df)
ggplot(average_df, aes(x = species_list, y = avg_list, color = as.factor(trial_list))) + geom_boxplot()
ggplot(average_df, aes(x = species_list, y = avg_list, color = as.factor(trial_list))) + geom_boxplot() + facet_wrap(~period_list)
Checking Sample size
table(all$Species.x, all$Trial.x)
##
## 16 18 20 22 24 26
## CI 60 54 82 100 80 91
## CM 54 52 82 94 74 95
## HS 52 54 74 101 75 65
Let’s look into size affects on proportional eaten.
mod <- lm(all$proportion.eaten~all$WW)
coef(mod)
## (Intercept) all$WW
## 0.0918113225 -0.0003709927
mod1 <- glm(all$proportion.eaten ~ all$WW + all$Species.x + all$Trial.x)
summary(mod1)
##
## Call:
## glm(formula = all$proportion.eaten ~ all$WW + all$Species.x +
## all$Trial.x)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.230e-03 9.537e-03 -0.339 0.73489
## all$WW -8.067e-05 2.658e-05 -3.035 0.00245 **
## all$Species.xCM 1.735e-02 4.314e-03 4.022 6.09e-05 ***
## all$Species.xHS 8.057e-02 5.932e-03 13.584 < 2e-16 ***
## all$Trial.x 1.656e-03 3.600e-04 4.598 4.66e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.001826013)
##
## Null deviance: 4.6314 on 1338 degrees of freedom
## Residual deviance: 2.4359 on 1334 degrees of freedom
## AIC: -4636.3
##
## Number of Fisher Scoring iterations: 2
Seems like weight has an affect on proportional amount eaten. This slope between weight and proportional amount eaten also changes between species. Furthermore, the trial that each individual is in affects the proportional amount eaten.